
library(dplyr)
library(lme4)
library(ggplot2)
library(ggsci)
library(broom)
library(broom.mixed)
library(dotwhisker)
library(texreg)


# read data
df <- read.csv("data/features_labels_narrow.csv")

#
# Regressions
#
atts <- c("num_ppl", "pos_mst_avg", "age_mean", "age_std", "gender_entropy", 
          "p_glass", "race_entropy", "smile_coef_mean", "smile_coef_std")

# standardize data
for (att in atts) {
  df[,att] <- scale(df[,att], center=T, scale=T)
}

# fixed effects models
glmer_all <- glmer(
  escaped ~ num_ppl + age_mean + age_std + gender_entropy + race_entropy +
    p_glass + pos_mst_avg  + smile_coef_mean + smile_coef_std + (1 | location) + (1 | room), 
  data=df, 
  family = binomial
)

glmer_pre <- glmer(
  escaped ~ p_glass + num_ppl  + age_mean + age_std + gender_entropy + race_entropy + (1 | location) + (1 | room), 
  data=df, 
  family = binomial
)

glmer_post <- glmer(
  escaped ~ pos_mst_avg + smile_coef_mean + smile_coef_std + (1 | location) + (1 | room), 
  data=df, 
  family = binomial
)

# plot
var_names <- c(
  num_ppl = "Number of People",
  age_mean = "Age (Mean)",
  age_std = "Age (SD)",
  gender_entropy = "Gender Diversity",
  race_entropy = "Ethnic Diversity",
  p_glass = "Fraction of People with Glasses",
  pos_mst_avg = "Physical Proximity",
  smile_coef_mean = "Smiling Index (Mean)",
  smile_coef_std = "Smiling Index (SD)"
)

df_models <- rbind(
    tidy(glmer_all) %>% mutate(model = "Model 1 \n (All Characteristics)"),
    tidy(glmer_pre) %>% mutate(model = "Model 2 \n (Fixed Characteristics)"),
    tidy(glmer_post) %>% mutate(model = "Model 3 \n (Temporary Characteristics)")
  ) %>% 
  filter(term != "(Intercept)") %>% # remove fixed effects coeffs
  filter(!grepl("^sd_", term)) %>% # remove fixed effects coeffs
  relabel_predictors(var_names)

p_coef <- df_models %>% 
  dwplot(show_intercept = TRUE, dot_args=list(shape=20, size=2.8, alpha=1), whisker_args=list(size=0.7)) +
  geom_vline(xintercept = 0, colour = "grey60", linetype = 2) +
  annotate("rect", xmin=-Inf, xmax=Inf, ymin=seq(0.5, 9.5, 2), ymax=seq(1.5, 10.5, 2), alpha=0.1, fill="black") +
  facet_grid(~model) +
  scale_x_continuous(limits=c(-0.2, 0.8), breaks = scales::pretty_breaks()) + 
  xlab("Standardized Coefficient") + ylab("") +
  scale_color_aaas() +
  guides(color = "none") +
  theme_bw() + 
  theme(
    text = element_text(family="Roboto Condensed"),
    panel.border = element_rect(colour="black", fill=NA, size=0.7),
    axis.line = element_blank(),
    strip.background = element_rect(colour=NA, fill=NA),
    strip.text = element_text(size=12, hjust=0.5, face="bold"),
    axis.title.x = element_text(size=13),
    axis.text.x = element_text(size=11),
    axis.text.y = element_text(size=12, color="black"),
    plot.margin=grid::unit(c(0,0,0,0), "mm")
  )

print(p_coef)

save_plot(
  "plots/pdfs/coefficient_plot.pdf",
  p_coef,
  device = cairo_pdf,
  base_height = 3.5,
  base_width = 12
)

# generate a regression table
screenreg(
  list(glmer_all = glmer_all, glmer_pre = glmer_pre, glmer_post = glmer_post),
  custom.coef.names = c("(Intercept)", var_names)
)

#
# Sanity check
# Compare the glmer models with glm models
#
# glm models
glm_all <- glm(
  escaped ~ num_ppl + age_mean + age_std + gender_entropy + race_entropy +
    pos_mst_avg + p_glass + smile_coef_mean + smile_coef_std + location + room, 
  data=df, 
  family = "binomial"
)

glm_pre <- glm(
  escaped ~ num_ppl + age_mean + age_std + gender_entropy + race_entropy + p_glass  + location + room, 
  data=df, 
  family = "binomial"
)

glm_post <- glm(
  escaped ~ pos_mst_avg + smile_coef_mean + smile_coef_std + location + room, 
  data=df, 
  family = "binomial"
)

screenreg(
  list(
    glmer_all = glmer_all, glm_all = glm_all, 
    glmer_pre = glmer_pre, glm_pre = glm_pre, 
    glmer_post = glmer_post, glm_post = glm_post
  )
)

# END